package com.itxing.utils;

import com.itxing.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @author xing
 * @create 2020/7/19-studyes
 */
public class HtmlParseUtil {
//    public static void main(String[] args) throws Exception {
//        new HtmlParseUtil().parseJD("心理学").forEach(System.out::println);
//    }

    public List<Content> parseJD(String keyWords)throws Exception{
        //连接网络，获取请求  不能获取ajax
        String url = "https://search.jd.com/Search?keyword="+keyWords+"&enc=utf-8";
        //解析网页
        Document document = Jsoup.parse(new URL(url),30000);
        //所有js中的方法都能够获取
        Element element = document.getElementById("J_goodsList");
        //System.out.println(element.html());
        //获取所有的li元素
        Elements li = element.getElementsByTag("li");
        //遍历所有的li标签
        //大部分的网站将资源延迟加载
        List<Content> list = new ArrayList<>();
        for(Element el:li){
            String img = el.getElementsByTag("img").eq(0).attr("src");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();
            if(img!=""&&price!=""&&title!=""){
                Content content = new Content();
                content.setImg(img);
                content.setPrice(price);
                content.setTitle(title);
                list.add(content);
            }
        }

        return list;
    }
}
